{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 印度糖尿病人，患病预测（0-1二分类问题）"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 导入必须的包"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from numpy import loadtxt\n",
    "from xgboost import XGBClassifier\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.metrics import accuracy_score\n",
    "from sklearn.model_selection import cross_val_score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 查看训练出来的模型(完成fit 步骤之后)\n",
    "#在训练集  测试集  上的交叉验证成绩\n",
    "\n",
    "def cv_score_train_test(model):\n",
    "    num_cv = 5\n",
    "    score_list = [\"neg_log_loss\",\"accuracy\",\"f1\", \"roc_auc\"]\n",
    "    train_scores = []\n",
    "    test_scores = []\n",
    "    for score in score_list:\n",
    "        train_scores.append(cross_val_score(model, X_train, y_train, cv=num_cv, scoring=score).mean())\n",
    "        test_scores.append(cross_val_score(model, X_test, y_test, cv=num_cv, scoring=score).mean())\n",
    "    scores = np.array((train_scores + test_scores)).reshape(2, -1)\n",
    "    scores_df = pd.DataFrame(scores, index=['Train', 'Test'], columns=score_list)\n",
    "    print(scores_df)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 分出变量和标签"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset = loadtxt('pima-indians-diabetes.csv', delimiter=\",\")\n",
    "\n",
    "X = dataset[:,0:8] #左开右闭\n",
    "Y = dataset[:,8]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 将数据分为训练集和测试集\n",
    "\n",
    "测试集用来预测，训练集用来学习模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "seed = 7\n",
    "test_size = 0.33\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=test_size, random_state=seed)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 使用XGBOOST封转好的分类器\n",
    "\n",
    "全部使用默认参数\n",
    "\n",
    "直接用XGBClassifier 建立模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,\n",
       "       colsample_bynode=1, colsample_bytree=1, gamma=0, learning_rate=0.1,\n",
       "       max_delta_step=0, max_depth=3, min_child_weight=1, missing=None,\n",
       "       n_estimators=100, n_jobs=1, nthread=None,\n",
       "       objective='binary:logistic', random_state=0, reg_alpha=0,\n",
       "       reg_lambda=1, scale_pos_weight=1, seed=None, silent=None,\n",
       "       subsample=1, verbosity=1)"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "xgb_clf1 = XGBClassifier()\n",
    "xgb_clf1.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "对测试集进行预测，并将预测的概率值，使用round函数转化为0 1 值"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "       neg_log_loss  accuracy        f1   roc_auc\n",
      "Train     -0.502422  0.756721  0.634669  0.818340\n",
      "Test      -0.646176  0.680615  0.536132  0.744753\n"
     ]
    }
   ],
   "source": [
    "cv_score_train_test(xgb_clf1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "不使用封装的函数，单独查看xgboost在测试集上的成绩"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n"
     ]
    }
   ],
   "source": [
    "y_probablity_pred = xgb_clf1.predict(X_test)\n",
    "y_predictions = [round(value) for value in y_probablity_pred]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "查看在测试集上的预测精度"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Accuracy: 77.95%\n"
     ]
    }
   ],
   "source": [
    "accuracy = accuracy_score(y_test, y_predictions)\n",
    "print(\"Accuracy: %.2f%%\" % (accuracy * 100.0))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2.监控模型的表现\n",
    "\n",
    "xgboost 可以在模型训练时，评价模型在测试集上的表现，也可以输出每一步的分数\n",
    "\n",
    "但是需要指定测试集，early_stopping，评价指标"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0]\tvalidation_0-auc:0.716217\tvalidation_0-logloss:0.690588\n",
      "Multiple eval metrics have been passed: 'validation_0-logloss' will be used for early stopping.\n",
      "\n",
      "Will train until validation_0-logloss hasn't improved in 50 rounds.\n",
      "[50]\tvalidation_0-auc:0.833065\tvalidation_0-logloss:0.584058\n",
      "[100]\tvalidation_0-auc:0.833602\tvalidation_0-logloss:0.532183\n",
      "[150]\tvalidation_0-auc:0.835749\tvalidation_0-logloss:0.505183\n",
      "[200]\tvalidation_0-auc:0.832528\tvalidation_0-logloss:0.492587\n",
      "[250]\tvalidation_0-auc:0.832394\tvalidation_0-logloss:0.485973\n",
      "[300]\tvalidation_0-auc:0.830784\tvalidation_0-logloss:0.484974\n",
      "Stopping. Best iteration:\n",
      "[282]\tvalidation_0-auc:0.831119\tvalidation_0-logloss:0.484596\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,\n",
       "       colsample_bynode=1, colsample_bytree=0.8, gamma=0,\n",
       "       learning_rate=0.01, max_delta_step=0, max_depth=5,\n",
       "       min_child_weight=1, missing=None, n_estimators=1000, n_jobs=1,\n",
       "       nthread=4, objective='binary:logistic', random_state=0, reg_alpha=0,\n",
       "       reg_lambda=1, scale_pos_weight=1, seed=27, silent=None,\n",
       "       subsample=0.8, verbosity=1)"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "xgb_clf2 = XGBClassifier(\n",
    "    learning_rate =0.01,\n",
    "    n_estimators=1000,\n",
    "    max_depth=5,\n",
    "    min_child_weight=1,\n",
    "    gamma=0,\n",
    "    subsample=0.8,\n",
    "    colsample_bytree=0.8,\n",
    "    objective= 'binary:logistic',\n",
    "    nthread=4,\n",
    "    scale_pos_weight=1,\n",
    "    seed=27\n",
    ")\n",
    "\n",
    "eval_set = [(X_test, y_test)]\n",
    "xgb_clf2.fit(\n",
    "    X_train, y_train,\n",
    "    early_stopping_rounds=50, \n",
    "#     eval_metric=\"logloss\", \n",
    "    eval_metric=[\"auc\", \"logloss\"], \n",
    "    eval_set=eval_set, \n",
    "    verbose=50)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3.输出特征的重要度\n",
    "\n",
    "gradient boosting 还有一个优点是可以给出训练好的模型的特征重要性\n",
    "\n",
    "**需要引入XGBOOST中的两个类**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAEWCAYAAACOv5f1AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzt3Xt4FPXZ//H3DeHBCDZWAhhOxhRo1QQioOKhZYEGOXhCfRDUCtVKi1qsBQ9PtYC2XkJTH09oEdEWD1ULVKFIEQusPr+2ICgBFYvYmhYQQRAUQqQJuX9/7CQN53DYQ3Y+r+vaKzvfmdm57x24d/Y7s/M1d0dERMKlQbIDEBGRxFPxFxEJIRV/EZEQUvEXEQkhFX8RkRBS8RcRCSEVf5E9mNkkM/tpsuMQiSfTdf5ytJhZKdAS2FWruaO7f3wErxkBnnX3NkcWXf1kZr8B1rr7XcmORdKLjvzlaLvQ3ZvWehx24T8azCwjmds/EmbWMNkxSPpS8ZeEMLPuZvYXM9tqZsuDI/rqed81s/fNbJuZ/cPMvh+0NwH+CLQys+3Bo5WZ/cbMfl5r/YiZra01XWpmt5vZCqDMzDKC9WaY2adm9pGZjTxArDWvX/3aZnabmW00s/VmdomZ9TezD8zsMzP7Sa11x5nZdDN7McjnbTPrXGv+KWYWDd6H98zsoj22+yszm2NmZcB1wFXAbUHufwiWu8PM/h68/kozG1jrNYaZ2f8zs1+a2ZYg13615p9gZr82s4+D+S/XmneBmZUEsf3FzDrVeQdLvaPiL3FnZq2BV4CfAycAo4EZZtY8WGQjcAHwFeC7wANm1sXdy4B+wMeH8U1iCDAAOB6oAv4ALAdaA72BH5nZ+XV8rROBY4J1xwBPAFcDXYFvAmPMLK/W8hcD04Jcfwu8bGaNzKxREMc8oAXwQ+A5M/t6rXWvBO4FjgOeBp4DfhHkfmGwzN+D7WYBdwPPmllOrdc4C1gFZAO/AJ40MwvmPQMcC5wWxPAAgJl1AZ4Cvg80Ax4HZplZ4zq+R1LPqPjL0fZycOS4tdZR5dXAHHef4+5V7v4asBToD+Dur7j73z3mdWLF8ZtHGMfD7r7G3cuBM4Dm7n6Pu//b3f9BrIAPruNrVQD3unsF8AKxovqQu29z9/eA94DaR8lvufv0YPn/JfbB0T14NAXGB3EsAGYT+6CqNtPd/xy8T1/uKxh3n+buHwfLvAisBs6stcg/3f0Jd98FTAVygJbBB0Q/4AfuvsXdK4L3G+B64HF3X+zuu9x9KrAziFnSUL3tD5WUdYm7/2mPtpOA/zazC2u1NQIWAgTdEmOBjsQOSI4F3jnCONbssf1WZra1VltD4P/q+Fqbg0IKUB783VBrfjmxor7Xtt29KuiSalU9z92rai37T2LfKPYV9z6Z2TXAj4HcoKkpsQ+kap/U2v6O4KC/KbFvIp+5+5Z9vOxJwFAz+2Gttv+qFbekGRV/SYQ1wDPufv2eM4JuhRnANcSOeiuCbwzV3RT7uhytjNgHRLUT97FM7fXWAB+5e4fDCf4wtK1+YmYNgDZAdXdVWzNrUOsDoB3wQa1198x3t2kzO4nYt5bewF/dfZeZlfCf9+tA1gAnmNnx7r51H/Pudfd76/A6kgbU7SOJ8CxwoZmdb2YNzeyY4ERqG2JHl42BT4HK4FtAn1rrbgCamVlWrbYSoH9w8vJE4EcH2f6bwBfBSeDMIIZ8MzvjqGW4u65mdmlwpdGPiHWfLAIWE/vgui04BxABLiTWlbQ/G4Da5xOaEPtA+BRiJ8uB/LoE5e7riZ1Af8zMvhrE8K1g9hPAD8zsLItpYmYDzOy4OuYs9YyKv8Sdu68hdhL0J8SK1hrgVqCBu28DRgK/A7YQO+E5q9a6fwOeB/4RnEdoReyk5XKglNj5gRcPsv1dxIpsIfARsAmYQuyEaTzMBK4gls93gEuD/vV/AxcR63ffBDwGXBPkuD9PAqdWn0Nx95XA/cBfiX0wFAB/PoTYvkPsHMbfiJ1o/xGAuy8l1u8/MYj7Q2DYIbyu1DP6kZfIUWRm44D27n51smMRORAd+YuIhJCKv4hICKnbR0QkhHTkLyISQil7nf/xxx/v7du3T3YYCVVWVkaTJk2SHUbCKN/0F7acUyHft956a5O7Nz/Ycilb/Fu2bMnSpUuTHUZCRaNRIpFIssNIGOWb/sKWcyrka2b/rMty6vYREQkhFX8RkRBS8RcRCSEVfxGREFLxFxEJIRV/EZEQUvEXEQkhFX8RkRBS8RcRCSEVfxGREFLxFxEJIRV/EZEQUvEXEQkhFX8RkRBS8RcRCSEVfxGREFLxFxEJIRV/EZEQUvEXEYmTa6+9lhYtWpCfn1/T9tlnn1FUVESHDh0oKipiy5YtALg7I0eOpH379nTq1Im33367Zp3bb7+d/Px88vPzefHFF49KbHEr/mY20szeN7MZZvZXM9tpZqPjtT0RkVQzbNgw5s6du1vb+PHj6d27N6tXr6Z3796MHz8egD/+8Y+sXr2a1atXM3nyZEaMGAHAK6+8wttvv01JSQmLFy+muLiYL7744ohji+cA7jcA/YAy4CTgkkNZubxiF7l3vBKPuFLWqIJKhoUoZ+Wb/sKWc+18S8cP4Fvf+halpaW7LTNz5kyi0SgAQ4cOJRKJMGHCBGbOnMk111yDmdG9e3e2bt3K+vXrWblyJT169CAjI4OMjAw6d+7M3LlzGTRo0BHFGpcjfzObBOQBs4Cr3H0JUBGPbYmI1CcbNmwgJycHgJycHDZu3AjAunXraNu2bc1ybdq0Yd26dXTu3Jk//vGP7Nixg02bNrFw4ULWrFlzxHHE5cjf3X9gZn2Bnu6+KR7bEBFJJ+6+V5uZ0adPH5YsWcI555xD8+bNOfvss8nIOPLSHc9un0NmZsOB4QDZ2c0ZU1CZ5IgSq2Vm7GtjWCjf9Be2nGvnW92188knn1BWVlYz/ZWvfIUZM2bQrFkzNm/ezHHHHUc0GqVBgwa8+uqrVFbG1l+9ejWlpaVs27aNc889l3PPPReAn/3sZ5SXl9e83uFKqeLv7pOByQDt8tr7/e+kVHhxN6qgkjDlrHzTX9hyrp1v6VWR2N/SUpo0aUIkEpu+4oorWL16NZdddhnjx49n8ODBRCIRysrKmDhxIvfccw+LFy/mxBNP5LLLLmPXrl1s3bqVZs2asWLFCjZs2MDo0aOP/Ojf3ePyAEqB7FrT44DRdV2/Y8eOHjYLFy5MdggJpXzTX9hy3jPfwYMH+4knnugZGRneunVrnzJlim/atMl79erl7du39169evnmzZvd3b2qqspvuOEGz8vL8/z8fF+yZIm7u5eXl/spp5zip5xyip911lm+bNmyA8YALPU61Ni4fySb2YnAUuArQJWZ/Qg41d2P/FolEZEU9vzzz++zff78+Xu1mRmPPvroXu3HHHMMK1euPOqxxa34u3turck28dqOiIgcOv3CV0QkhFT8RURCSMVfRCSEVPxFREJIxV9EJIRU/EVEQkjFX0QkhFT8RURCSMVfRCSEVPxFREJIxV9EJIRU/EVEQkjFX0QkhFT8RURCSMVfRCSEVPxFJO088MADnHbaaeTn5zNkyBC+/PJLFixYQJcuXcjPz2fo0KE1Y+VGo1GysrIoLCyksLCQe+65J8nRJ0bcBnMxs5HACGIjeDUFPgpm/d7dD/rullfsIveOV+IVXkoaVVDJsBDlrHzTX6JzLh0/gHXr1vHwww+zcuVKMjMzGTRoEL/97W8ZO3Ys8+fPp2PHjowZM4apU6dy3XXXAfDNb36T2bNnJyzOVBDPI/8bgP7AVcD/uXth8AjHx6qIJE1lZSXl5eVUVlayY8cOmjRpQuPGjenYsSMARUVFzJgxI8lRJldcir+ZTQLygFnA6fHYhojIvrRu3ZrRo0fTrl07cnJyyMrKYtCgQVRUVLB06VIApk+fzpo1a2rW+etf/0rnzp3p168f7733XrJCTyiLDfYehxc2KwW6AfnADGAt8DEw2t33+e6a2XBgOEB2dvOuYx58Ii6xpaqWmbChPNlRJI7yTX+JzrmgdRbbtm1j7NixjBkzhqZNmzJu3Dh69OhBq1atePzxx6moqKBbt24sWrSIJ554grKyMho0aEBmZiaLFi1i4sSJPPvss4e1/e3bt9O0adOjnNWh6dmz51vu3u1gyyWi+P8bqHL37WbWH3jI3TscbP12ee29waCH4hJbqhpVUMn978TtNEzKUb7pL9E5l44fwLRp05g7dy5PPvkkAE8//TSLFi3iscceq1lu3rx5TJkyhd/97nd7vUZubi5Lly4lOzv7kLcfjUaJRCKHHf/RYGZ1Kv5x3yvu/kWt53PM7DEzy3b3TQdaL7NRQ1aNHxDv8FJKNBql9KpIssNIGOWb/pKRc7t27Vi0aBE7duwgMzOT+fPn061bNzZu3EiLFi3YuXMnEyZM4M477wTgk08+oWXLlpgZb775JlVVVTRr1iyhMSdD3Iu/mZ0IbHB3N7MziZ1n2Bzv7YpIOJ111llcfvnldOnShYyMDE4//XSGDx/OXXfdxezZs6mqqmLEiBH06tULiPX//+pXvyIjI4PMzExeeOEFzCzJWcRfIr6PXQ6MMLNKoBwY7PHqaxIRAe6++27uvvvu3dqKi4spLi7ea9mbbrqJm266KVGhpYy4FX93zw2eTgweIiKSIvQLXxGREFLxFxEJIRV/EZEQUvEXEQkhFX8RkRBS8RcRCSEVfxGREFLxFxEJIRV/EZEQUvEXEQkhFX8RkRBS8RcRCSEVfxGREFLxFxEJIRV/EUkrDzzwAKeddhr5+fkMGTKEL7/8kgULFtClSxfy8/MZOnQolZWVALg7I0eOpH379nTq1Im33347ydEnTjzH8B0JjADeBp4AHgQaAZvcvcfB1tcYvulP+aa/ROZcOn4A69at47zzzmPlypVkZmYyaNAg+vbty9ixY5k/fz4dO3ZkzJgxnHTSSVx33XXMmTOHRx55hDlz5rB48WJuvvlmFi9efNgx1KcxfON55H8D0B+4EXgMuMjdTwP+O47bFJGQq6yspLy8nMrKSnbs2EGTJk1o3LgxHTt2BKCoqIgZM2YAMHPmTK655hrMjO7du7N161bWr1+fzPATJi7F38wmAXnALGLF//fu/i8Ad98Yj22KiLRu3ZrRo0fTrl07cnJyyMrKYtCgQVRUVLB06VIgNmbvmjVrAFi3bh1t27atWb9NmzasW7cuKbEnWly+j7n7D8ysL9ATuAtoZGZR4DjgIXd/el/rmdlwYDhAdnZzxhRUxiO8lNUyM/Y1OSyUb/pLZM7RaJRt27YxdepUnn32WZo2bcq4ceO46667uO2227j22mupqKigW7dufPnll0SjUTZt2sSyZctqzgFs2bKFt956i+3btx9WDNu3bycajR7FrOInEZ1xGUBXoDeQCfzVzBa5+wd7Lujuk4HJEOvzV/9oelO+6S+hff5XRZg2bRqnn346l1xyCQAff/wxixYt4t577+XGG28EYN68eezcuZNIJELnzp3Jzs6u6acvKyvjoosuIicn57BiSIU+/7pKxF5ZS+wkbxlQZmZvAJ2BvYp/bZmNGrJq/IAEhJc6otEopVdFkh1Gwijf9JfonNu1a8eiRYvYsWMHmZmZzJ8/n27durFx40ZatGjBzp07mTBhAnfeeScAF110ERMnTmTw4MEsXryYrKyswy789U0iLvWcCXzTzDLM7FjgLOD9BGxXRELmrLPO4vLLL6dLly4UFBRQVVXF8OHDKS4u5pRTTqFTp05ceOGF9OrVC4D+/fuTl5dH+/btuf7663nssceSnEHixP3I393fN7O5wAqgCpji7u/Ge7siEk533303d999925txcXFFBcX77WsmfHoo48mKrSUErfi7+65tZ4XA3u/8yIikhT6ha+ISAip+IuIhJCKv4hICKn4i4iEkIq/iEgIqfiLiISQir+ISAip+IuIhJCKv4hICKn4i4iEkIq/iEgIHXLxN7OvmlmneAQjIiKJUafib2ZRM/uKmZ0ALAd+bWb/G9/QREQkXup65J/l7l8AlwK/dveuwLfjF5aIiMRTXYt/hpnlAIOA2XGMR0TqsVWrVlFYWFjzGDBgAA8++CAlJSV0796dwsJCunXrxptvvrnbekuWLKFhw4ZMnz49SZGHT13v538P8CrwZ3dfYmZ5wOoDrWBmI4ERQLtay2YApwDN3f2zwwtZRFLV17/+dUpKSgDYtWsXzZs3Z+DAgVx//fWMHTuWfv36MWfOHG677baagc537drF7bffzvnnn5/EyMOnTsXf3acB02pN/wO47CCr3QD0c/ePqhvM7ELglroU/vKKXeTe8UpdwksbowoqGRainJVveindY8zt+fPn06pVK0466STMjC+++AKAzz//nFatWtUs98gjj3DZZZexZMmShMYbdnUq/mbWEfgV0NLd84OrfS5y95/vZ/lJQB4wy8yecvcHgllDgOePQtwikuJeeOEFevfuDcCDDz7I+eefz+jRo6mqquIvf/kLAOvWreOll15iwYIFKv4JZu5+8IXMXgduBR5399ODtnfdPf8A65QC3dx9UzB9LLAWaL+/I38zGw4MB8jObt51zINPHFo29VzLTNhQnuwoEkf5ppeC1lk1zysqKrj88suZOHEibdu25eGHH6Zz58706NGDhQsXMnv2bO6//37GjRvHoEGDOPXUUxk/fjxnn302PXr0SGIWR2b79u00bdo0qTH07NnzLXfvdrDl6lr8l7j7GWa2rFbxL3H3wgOsU8ruxf8K4Gp3v7AuCbTLa+8NBj1Ul0XTxqiCSu5/J27DKqcc5Zteanf7zJw5k0cffZSf/OQnRCIRsrKy2Lp1K2aGu5OVlcUXX3zBySefTHUN2rRpE8ceeyyTJ0/mkksuSVYaRyQajRKJRJIag5nVqfjX9V/iJjP7GuDBi18OrD/EmAZzCF0+mY0asmqPPsR0F41GKb0qkuwwEkb5pq/nn3+eIUOG1Ey3atWK119/nUgkwoIFC+jQoQMAH31Uc0qQYcOGccEFF9Tbwl/f1LX43whMBr5hZuuAj4Cr6roRM8sCegBXH3KEIlKv7Nixg9dee43HH3+cZcuWAfDEE09w8803U1lZyTHHHMPkyZOTHKUctPibWQNi3TffNrMmQAN333aI2xkIzHP3ssMJUkTqj2OPPZbNmzfv1nbeeefx1ltvHXC93/zmN3GMSvZ00B95uXsVcFPwvKyuhd/dc6v7+939N+4++IgiFRGRo6auv/B9zcxGm1lbMzuh+hHXyEREJG7q2ud/bfD3xlptTuxafhERqWfq+gvfk+MdiIiIJE5df+F7zb7a3f3poxuOiIgkQl27fc6o9fwYoDfwNqDiLyJSD9W12+eHtaeD6/afiUtEIiISd4c7hu8OoMPRDERERBKnrn3+fyC4tQOxD4xTqXWLZxERqV/q2uf/y1rPK4F/uvvaOMQjIiIJUNdun/7u/nrw+LO7rzWzCXGNTERE4qauxb9oH239jmYgIiKSOAfs9jGzEcSGY8wzsxW1Zh0H/DmegYmISPwcrM//t8AfgfuAO2q1b9MA7CIi9dcBi7+7fw58TmzsXcysBbEfeTU1s6bu/q/4hygiIkdbnfr8zexCM1tNbBCX14FSYt8IREItNzeXgoICCgsL6dYtNnLeuHHjaN26NYWFhRQWFjJnzhwAXnvttZq2wsJCGjRoQElJSTLDlxCr66WePwe6A39y99PNrCfBt4H9MbORwAjgG8A7QfN2YIS7Lz/MeEVSzsKFC8nOzt6t7ZZbbmH06NG7tRUVFXHvvfcC8M4773DxxRdTWLjfYbBF4qquxb/C3TebWQMza+DuC+twqecNxK4IygHed/ctZtaP2HCQZx1sg+UVu8i945U6hpceRhVUMixEOdfnfEuPcHzpPce4FUm0ul7qudXMmgL/BzxnZg8R+7HXPpnZJGL3+p8FnOXuW4JZi4A2RxCvSEoxM/r06UPXrl13G5d24sSJdOrUiWuvvZYtW7bstd6LL76o4i9JZe5+8IViY/eWE/uwuArIAp5z980HWKeU2Ni/m2q1jQa+4e7f2886w4HhANnZzbuOefCJumeSBlpmwobyZEeROPU534LWWQBs2rSJ7OxstmzZwujRoxk5ciRt27YlKysLM+Opp55i8+bN3H777Wzfvp2mTZuycuVKfvnLX/LUU08lOYv4q845LFIh3549e77l7t0Otlxd7+pZZmYnAR3cfaqZHQs0PJSAgvME1wHnHWA7k4l1C9Eur73f/05de6XSw6iCSsKUc33Ot/SqyF5ty5cvp6KigksvvbSmLS8vjwsuuIBIJEI0GiUSiTBz5ky+973vEYns/RrppjrnsKhP+db1ap/rgenA40FTa+Dlum7EzDoBU4CLD/RtQaQ+KSsrY9u2bTXP582bR35+PuvXr69Z5qWXXiI/P79muqqqimnTpjF48OCExytSW10Pu24EzgQWA7j76uCa/4Mys3bA74HvuPsHdQ0ss1FDVh3hSbX6JhqN7vOIMl3V93w3bNjAwIEDAaisrOTKK6+kb9++fOc736GkpAQzIzc3l8cff7xmnTfeeIM2bdqQl6fhryW56lr8d7r7v80MADPL4D+3eD6YMUAz4LFg/cq69EeJpLq8vDyWL9/7quVnntn/OEeRSIRFixbFMyyROqlr8X/dzH4CZJpZEbHLOP9woBXcPTd4+r3gISIiKaKul3reAXxK7Mda3wfmAHfFKygREYmvg93Vs527/8vdq4AngoeIiNRzBzvyr7mix8xmxDkWERFJkIMVf6v1XJcniIikiYMVf9/PcxERqccOdrVPZzP7gtg3gMzgOcG0u/tX4hqdiIjExcEGczmkWziIiEj9UNdLPUVEJI2o+IuIhJCKv4hICKn4i4iEkIq/iEgIqfiLiISQir+ISAip+IvsR25uLgUFBRQWFtKtW2wIiltvvZVvfOMbdOrUiYEDB7J169aa5VesWMHZZ5/NaaedRkFBAV9++WWyQhc5qLgWfzMbaWbvm9kWM1thZiVmttTM9juOr0gqWbhwISUlJSxduhSAoqIi3n33XVasWEHHjh257777gNhIXldffTWTJk3ivffeIxqN0qhRo2SGLnJA8R49+wagH7GxAMrc3YPxfH8HfONAK5ZX7CL3jlfiHF5qGVVQybAQ5Zyq+ZYeYPjQPn361Dzv3r0706dPB2DevHl06tSJzp07A9CsWbP4BilyhOJ25G9mk4jdCXQWcL27V98Yrgm6SZzUA2ZGnz596Nq1K5MnT95r/lNPPUW/fv0A+OCDDzAzzj//fLp06cIvfvGLRIcrckjiduTv7j8ws75AT3ffZGYDgfuAFsA+D63MbDgwHCA7uzljCirjFV5KapkZOxoOi1TNNxqNAlBcXEx2djZbtmxh9OjRlJeX1xzZP/vss2zdupXWrVsTjUZZtWoVf/rTn5g0aRKNGzdm1KhRNGzYkK5du9a87vbt22teOyzClnN9yjfe3T413P0l4CUz+xbwM+Db+1hmMjAZoF1ee7//nYSFlxJGFVQSppxTNd/SqyJ7tS1fvpyKigoikQhTp07lvffeY/78+Rx77LEAfPLJJ5SXl3PxxRcDsGTJEqqqqohE/vNa0Wh0t+kwCFvO9SnfhF/t4+5vAF8zs+xEb1ukrsrKyti2bVvN83nz5pGfn8/cuXOZMGECs2bNqin8AOeffz4rVqxgx44dVFZW8vrrr3PqqacmK3yRg0rIYZeZtQf+Hpzw7QL8F7D5QOtkNmrIqgOceEtH0Wh0n0ed6SqV892wYQMDBw4EYlfyXHnllfTt25f27duzc+dOioqKgNhJ30mTJvHVr36VH//4x5xxxhmYGf3792fAgHD9+5X6JVHfuS8DrjGzCqAcuKLWCWCRlJOXl8fy5cv3av/www/3u87VV1/N1VdfHc+wRI6auBZ/d88Nnk4IHiIikgL0C18RkRBS8RcRCSEVfxGREFLxFxEJIRV/EZEQUvEXEQkhFX8RkRBS8RcRCSEVfxGREFLxFxEJIRV/EZEQUvEXEQkhFX8RkRBS8RcRCSEVfwm13NxcCgoKKCwspFu3bgB89tlnFBUV0aFDB4qKitiyZQsAzz33HJ06daJTp06cc845+7zfv0h9Edfib2Yjzex9M3sumD7DzHaZ2eXx3K7IoVi4cCElJSUsXboUgPHjx9O7d29Wr15N7969GT9+PAAnn3wyr7/+OitWrOCnP/0pw4cPT2bYIkck3iN53QD0c/ePzKwhsQFdXq3LiuUVu8i945W4BpdqRhVUMixEOScz39IDDBE6c+ZMotEoAEOHDiUSiTBhwgTOOeecmmW6d+/O2rVr4x2mSNzE7cjfzCYBecAsM7sF+CEwA9gYr22KHCozo0+fPnTt2pXJkycDsfF7c3JyAMjJyWHjxr3/yT755JP069cvobGKHE1xO/J39x+YWV+gJ9AY+C3QCzhjf+uY2XBgOEB2dnPGFFTGK7yU1DIzdjQcFsnMt/rIvri4mOzsbLZs2cLo0aMpLy+nsrKyZj6w1/SyZct45JFHePjhh3drP5jt27cf0vLpIGw516d8EzWA+4PA7e6+y8z2u5C7TwYmA7TLa+/3v5Oo8FLDqIJKwpRzMvMtvSqyV9vy5cupqKigdevWfP3rXycnJ4f169fTqlUrIpHY8itWrGDixIm89tprdOzY8ZC2GY1Ga14nLMKWc33KN1FX+3QDXjCzUuBy4DEzuyRB2xbZp7KyMrZt21bzfN68eeTn53PRRRcxdepUAKZOncrFF18MwL/+9S8uvfRSnnnmmUMu/CKpJiGHXe5+cvVzM/sNMNvdXz7QOpmNGrLqACfl0lE0Gt3nEWm6Sna+GzZsYODAgUCsa+fKK6+kb9++nHHGGQwaNIgnn3ySdu3aMW3aNADuueceNm/ezA033ABARkZGzRVCIvVNePoYRPaQl5e3z2v1mzVrxvz58/dqnzJlClOmTElEaCJxF9fi7+65+2gbFs9tiojIwekXviIiIaTiLyISQir+IiIhpOIvIhJCKv4iIiGk4i8iEkIq/iIiIaTiLyISQir+IiIhpOIvIhJCKv4iIiFhwT2uAAAJuklEQVSk4i8iEkIq/iIiIaTiLyISQir+kjT//ve/OfPMM+ncuTOnnXYaY8eOBWDBggV06dKF/Px8hg4dSmVlbJzf4uJiCgsLKSwsJD8/n4YNG/LZZ58lMwWReiuuxd/MRprZ+2b2nJk9bGYfmtkKM+sSz+1K/dCoUSMWLFjA8uXLKSkpYe7cufzlL39h6NChvPDCC7z77rucdNJJNUMq3nrrrZSUlFBSUsJ9991Hjx49OOGEE5KchUj9FO+RvG4A+gGnAD8EOgBnAb8K/u5XecUucu94Jc7hpZZRBZUMC0nOpeMHYGY0bdoUgIqKCioqKmjYsCGNGzeuGSO3qKiI++67j+uuu2639Z9//nmGDBmS8LhF0kXcjvzNbBKQB8wCXgKe9phFwPFmlhOvbUv9sWvXLgoLC2nRogVFRUWceeaZVFRU1IyNO336dNasWbPbOjt27GDu3LlcdtllyQhZJC3Erfi7+w+Aj4GewGtA7f/Ba4HW8dq21B8NGzakpKSEtWvX8uabb/Lee+/xwgsvcMstt3DmmWdy3HHHkZGx+xfUP/zhD5x77rnq8hE5AokawN320eZ7LWQ2HBgOkJ3dnDEFlfGOK6W0zIx1/YRBNBpl+/btRKPRmrbc3FweffRRrrjiCn72s58BsGTJErKysnZbbuLEifTo0WO3tvpgz3zDIGw516d8zX2vGnz0XtysFOgG3AtE3f35oH0VEHH39ftbt11ee28w6KG4xZaKRhVUcv87ifo8Tq7S8QN4+eWXiUQiHH/88ZSXl9OnTx9uv/12zjzzTFq0aMHOnTvp378/d955J7169QLg888/5+STT2bNmjU0adIkyVkcmmg0SiQSSXYYCRW2nFMhXzN7y927HWy5RFWaWcBNZvYCsRO9nx+o8ANkNmrIqvEDEhJcqohGo5ReFUl2GAmzefNmevbsya5du6iqqmLQoEFccMEF3HrrrcyePZuqqipGjBhRU/gBXnrpJfr06VPvCr9IqklU8Z8D9Ac+BHYA303QdiWFfe1rX2PZsmV7tRcXF1NcXLzPdYYNG8awYcPiHJlI+otr8Xf33FqTN8ZzWyIiUnf6ha+ISAip+IuIhJCKv4hICKn4i4iEkIq/iEgIqfiLiISQir+ISAip+IuIhJCKv4hICKn4i4iEkIq/iEgIqfiLiISQir+ISAip+IuIhJCKv4hICKn4i4iEkIq/iEgIqfiLiISQir+ISAiZuyc7hn0ys23AqmTHkWDZwKZkB5FAyjf9hS3nVMj3JHdvfrCF4jqA+xFa5e7dkh1EIpnZ0jDlrHzTX9hyrk/5qttHRCSEVPxFREIolYv/5GQHkARhy1n5pr+w5Vxv8k3ZE74iIhI/qXzkLyIicaLiLyISQilZ/M2sr5mtMrMPzeyOZMdzNJhZWzNbaGbvm9l7ZnZz0H6Cmb1mZquDv18N2s3MHg7egxVm1iW5GRweM2toZsvMbHYwfbKZLQ7yfdHM/itobxxMfxjMz01m3IfLzI43s+lm9rdgX5+dzvvYzG4J/j2/a2bPm9kx6bSPzewpM9toZu/Wajvk/WlmQ4PlV5vZ0GTksqeUK/5m1hB4FOgHnAoMMbNTkxvVUVEJjHL3U4DuwI1BXncA8929AzA/mIZY/h2Cx3DgV4kP+ai4GXi/1vQE4IEg3y3AdUH7dcAWd28PPBAsVx89BMx1928AnYnlnpb72MxaAyOBbu6eDzQEBpNe+/g3QN892g5pf5rZCcBY4CzgTGBs9QdGUrl7Sj2As4FXa03/D/A/yY4rDnnOBIqI/Yo5J2jLIfbjNoDHgSG1lq9Zrr48gDbE/nP0AmYDRuzXjxl77mvgVeDs4HlGsJwlO4dDzPcrwEd7xp2u+xhoDawBTgj22Wzg/HTbx0Au8O7h7k9gCPB4rfbdlkvWI+WO/PnPP6hqa4O2tBF83T0dWAy0dPf1AMHfFsFi6fA+PAjcBlQF082Are5eGUzXzqkm32D+58Hy9Uke8Cnw66Cra4qZNSFN97G7rwN+CfwLWE9sn71Feu9jOPT9mZL7ORWLv+2jLW2uRzWzpsAM4Efu/sWBFt1HW715H8zsAmCju79Vu3kfi3od5tUXGUAX4FfufjpQxn+6BPalXuccdF1cDJwMtAKaEOv62FM67eMD2V9+KZl3Khb/tUDbWtNtgI+TFMtRZWaNiBX+59z990HzBjPLCebnABuD9vr+PpwLXGRmpcALxLp+HgSON7Pqe0rVzqkm32B+FvBZIgM+CtYCa919cTA9ndiHQbru428DH7n7p+5eAfweOIf03sdw6PszJfdzKhb/JUCH4IqB/yJ2AmlWkmM6YmZmwJPA++7+v7VmzQKqz/4PJXYuoLr9muAKgu7A59VfNesDd/8fd2/j7rnE9uECd78KWAhcHiy2Z77V78PlwfJJPzo6FO7+CbDGzL4eNPUGVpKm+5hYd093Mzs2+PddnW/a7uPAoe7PV4E+ZvbV4NtSn6AtuZJ90mE/J1j6Ax8AfwfuTHY8Rymn84h91VsBlASP/sT6POcDq4O/JwTLG7Grnv4OvEPsioqk53GYuUeA2cHzPOBN4ENgGtA4aD8mmP4wmJ+X7LgPM9dCYGmwn18GvprO+xi4G/gb8C7wDNA4nfYx8Dyx8xkVxI7grzuc/QlcG+T9IfDdZOfl7rq9g4hIGKVit4+IiMSZir+ISAip+IuIhJCKv4hICKn4i4iEUCoP4C4SF2a2i9ileNUucffSJIUjkhS61FNCx8y2u3vTBG4vw/9zrxuRlKBuH5E9mFmOmb1hZiXBfeq/GbT3NbO3zWy5mc0P2k4ws5eD+7cvMrNOQfs4M5tsZvOApy02rkGxmS0Jlv1+ElMUUbePhFKmmZUEzz9y94F7zL+S2G2I7w3GlzjWzJoDTwDfcvePgnu0Q+wXrsvc/RIz6wU8TexXvgBdgfPcvdzMhhP7uf8ZZtYY+LOZzXP3j+KZqMj+qPhLGJW7e+EB5i8BngpuxPeyu5eYWQR4o7pYu3v1DcnOAy4L2haYWTMzywrmzXL38uB5H6CTmVXf8yaL2KAfKv6SFCr+Intw9zfM7FvAAOAZMysGtrLv2/Ae6Ha9ZXss90N3T/4NvURQn7/IXszsJGJjETxB7E6sXYC/Aj3M7ORgmepunzeAq4K2CLDJ9z1Ow6vAiODbBGbWMRjoRSQpdOQvsrcIcKuZVQDbgWvc/dOg3/73ZtaA2D3ci4BxxEbuWgHs4D+3+t3TFGLDAb4d3P74U+CSeCYhciC61FNEJITU7SMiEkIq/iIiIaTiLyISQir+IiIhpOIvIhJCKv4iIiGk4i8iEkL/H3wgHJWe/4leAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from xgboost import plot_importance\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "# 只需要在模型拟合fit完成之后加入\n",
    "plot_importance(xgb_clf2)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 第二部分"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# XGBOOST参数调优"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import GridSearchCV\n",
    "from sklearn.model_selection import StratifiedKFold"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1.学习率，估计器数目"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fitting 5 folds for each of 20 candidates, totalling 100 fits\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:    9.1s\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Best: -0.479729 using {'learning_rate': 0.01, 'n_estimators': 300}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:   14.9s finished\n"
     ]
    }
   ],
   "source": [
    "#搜索学习率和估计器数目\n",
    "#其他参数设置为默认值\n",
    "model1_1 = XGBClassifier(\n",
    "    max_depth=5,\n",
    "    min_child_weight=1,\n",
    "    gamma=0,\n",
    "    subsample=0.8,\n",
    "    colsample_bytree=0.8,\n",
    "    objective= 'binary:logistic',\n",
    "    nthread=4,\n",
    "    scale_pos_weight=1,\n",
    "    seed=27)\n",
    "\n",
    "\n",
    "#网格搜索参数列表\n",
    "learning_rate = [ 0.001, 0.01, 0.1, 0.2]\n",
    "n_estimators = [100, 200, 300, 500, 1000]\n",
    "param1 = dict(learning_rate=learning_rate, n_estimators=n_estimators)\n",
    "\n",
    "kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=7)\n",
    "\n",
    "#网格搜索类，要求的param_grid参数，必须是字典，或者字典构成的列表\n",
    "\n",
    "#scoring 参数根据实际情况设定，roc_auc 或者 neg_log_loss\n",
    "grid_search = GridSearchCV(model1_1, param_grid=param1, scoring=\"neg_log_loss\", n_jobs=-1, cv=kfold, verbose=1)\n",
    "# grid_search = GridSearchCV(model1_1, param_grid=param1, scoring=\"roc_auc\", n_jobs=-1, cv=kfold, verbose=1)\n",
    "grid_result = grid_search.fit(X_train, y_train)\n",
    "\n",
    "print(\"Best: %f using %s\" % (grid_result.best_score_, grid_result.best_params_))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "设置学习率为上述搜索到的学习率的值，具体查看最优化的 估计其数目 是多少\n",
    "\n",
    "这一步也可以不要，直接使用上述的最好n_estimators"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0]\tvalidation_0-logloss:0.690588\n",
      "Will train until validation_0-logloss hasn't improved in 100 rounds.\n",
      "[50]\tvalidation_0-logloss:0.584058\n",
      "[100]\tvalidation_0-logloss:0.532183\n",
      "[150]\tvalidation_0-logloss:0.505183\n",
      "[200]\tvalidation_0-logloss:0.492587\n",
      "[250]\tvalidation_0-logloss:0.485973\n",
      "[300]\tvalidation_0-logloss:0.484974\n",
      "[350]\tvalidation_0-logloss:0.486333\n",
      "Stopping. Best iteration:\n",
      "[282]\tvalidation_0-logloss:0.484596\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,\n",
       "       colsample_bynode=1, colsample_bytree=0.8, gamma=0,\n",
       "       learning_rate=0.01, max_delta_step=0, max_depth=5,\n",
       "       min_child_weight=1, missing=None, n_estimators=400, n_jobs=1,\n",
       "       nthread=4, objective='binary:logistic', random_state=0, reg_alpha=0,\n",
       "       reg_lambda=1, scale_pos_weight=1, seed=27, silent=None,\n",
       "       subsample=0.8, verbosity=1)"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model1_2 = XGBClassifier(\n",
    "    learning_rate =0.01,\n",
    "    n_estimators=400,\n",
    "    max_depth=5,\n",
    "    min_child_weight=1,\n",
    "    gamma=0,\n",
    "    subsample=0.8,\n",
    "    colsample_bytree=0.8,\n",
    "    objective= 'binary:logistic',\n",
    "    nthread=4,\n",
    "    scale_pos_weight=1,\n",
    "    seed=27\n",
    ")\n",
    "\n",
    "eval_set = [(X_test, y_test)]\n",
    "model1_2.fit(\n",
    "    X_train, y_train,\n",
    "    early_stopping_rounds=100, \n",
    "    eval_metric=\"logloss\", \n",
    "#     eval_metric=\"auc\", \n",
    "    eval_set=eval_set, \n",
    "    verbose=50)\n",
    "#verbose是指，每隔50个estimator才打印一次成绩"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "查看训练出来的模型\n",
    "\n",
    "在训练集  测试集  上的交叉验证成绩"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "       neg_log_loss  accuracy        f1   roc_auc\n",
      "Train      -0.49006  0.764489  0.641571  0.819106\n",
      "Test       -0.55298  0.692769  0.550016  0.779069\n"
     ]
    }
   ],
   "source": [
    "cv_score_train_test(model1_2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**结论**\n",
    "- 最佳学习率 0.01 \n",
    "- 估计其数目 300（282）\n",
    "\n",
    "**如果scoring参数设置为aoc， **\n",
    "\n",
    "那么n_estimator=50即可在测试集上获得比较好的成绩\n",
    "\n",
    "\n",
    "**如果scoring设置为neg_log_loss**\n",
    "\n",
    "那么需要设置n_estimator需要设置为300左右"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2. max_depth 和 min_child_weight"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fitting 5 folds for each of 20 candidates, totalling 100 fits\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:    6.1s\n",
      "[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:    9.4s finished\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Best: -0.471508 using {'max_depth': 3, 'min_child_weight': 5}\n"
     ]
    }
   ],
   "source": [
    "#搜索学习率和估计器数目\n",
    "#其他参数设置为默认值\n",
    "model2 = XGBClassifier(\n",
    "    learning_rate=0.01,\n",
    "    n_estimators=300,\n",
    "    gamma=0,\n",
    "    subsample=0.8,\n",
    "    colsample_bytree=0.8,\n",
    "    objective= 'binary:logistic',\n",
    "    nthread=4,\n",
    "    scale_pos_weight=1,\n",
    "    seed=27)\n",
    "\n",
    "max_depth = [ i for i in range(1, 6)]\n",
    "min_child_weight = [i for i in range(4, 8)]\n",
    "param2 = dict(max_depth=max_depth, min_child_weight=min_child_weight)\n",
    "\n",
    "kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=7)\n",
    "\n",
    "#网格搜索类，要求的param_grid参数，必须是字典，或者字典构成的列表\n",
    "grid_search = GridSearchCV(model2, param_grid=param2, scoring=\"neg_log_loss\", n_jobs=-1, cv=kfold, verbose=1)\n",
    "grid_result = grid_search.fit(X_train, y_train)\n",
    "\n",
    "print(\"Best: %f using %s\" % (grid_result.best_score_, grid_result.best_params_))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "查看模型在训练集、测试集上的交叉验证成绩"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "       neg_log_loss  accuracy        f1   roc_auc\n",
      "Train     -0.475166  0.758758  0.614573  0.830570\n",
      "Test      -0.521323  0.751385  0.633099  0.803339\n"
     ]
    }
   ],
   "source": [
    "cv_score_train_test(grid_search.best_estimator_)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**结论：**\n",
    "- 'max_depth': 3\n",
    "- 'min_child_weight': 5"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3.gamma参数调优"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fitting 5 folds for each of 7 candidates, totalling 35 fits\n",
      "Best: -0.471190 using {'gamma': 0.7}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=-1)]: Done  35 out of  35 | elapsed:    6.1s finished\n"
     ]
    }
   ],
   "source": [
    "model3 = XGBClassifier(\n",
    "    learning_rate=0.01,\n",
    "    n_estimators=300,\n",
    "    max_depth=3,\n",
    "    min_child_weight=5,\n",
    "    subsample=0.8,\n",
    "    colsample_bytree=0.8,\n",
    "    objective= 'binary:logistic',\n",
    "    nthread=4,\n",
    "    scale_pos_weight=1,\n",
    "    seed=27)\n",
    "\n",
    "gamma = [ i/10.0 for i in range(5, 12)]\n",
    "param3 = dict(gamma=gamma)\n",
    "\n",
    "kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=7)\n",
    "\n",
    "#网格搜索类，要求的param_grid参数，必须是字典，或者字典构成的列表\n",
    "grid_search = GridSearchCV(model3, param_grid=param3, scoring=\"neg_log_loss\", n_jobs=-1, cv=kfold, verbose=1)\n",
    "grid_result = grid_search.fit(X_train, y_train)\n",
    "\n",
    "print(\"Best: %f using %s\" % (grid_result.best_score_, grid_result.best_params_))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "       neg_log_loss  accuracy        f1   roc_auc\n",
      "Train     -0.475537  0.758758  0.614573  0.829718\n",
      "Test      -0.520716  0.747385  0.630400  0.803452\n"
     ]
    }
   ],
   "source": [
    "# 查看模型在训练集、测试集上的交叉验证成绩\n",
    "cv_score_train_test(grid_search.best_estimator_)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4.subsample 和 colsample_bytree 参数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fitting 5 folds for each of 16 candidates, totalling 80 fits\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:    7.4s\n",
      "[Parallel(n_jobs=-1)]: Done  80 out of  80 | elapsed:   10.4s finished\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Best: -0.473702 using {'colsample_bytree': 0.7, 'subsample': 0.8}\n"
     ]
    }
   ],
   "source": [
    "model4 = XGBClassifier(\n",
    "    learning_rate=0.01,\n",
    "    n_estimators=300,\n",
    "    max_depth=4,\n",
    "    min_child_weight=4,\n",
    "    gamma=0.7,\n",
    "    objective= 'binary:logistic',\n",
    "    nthread=4,\n",
    "    scale_pos_weight=1,\n",
    "    seed=27)\n",
    "\n",
    "subsample = [ i/10.0 for i in range(6, 10)]\n",
    "colsample_bytree  =  [ i/10.0 for i in range(6, 10)]\n",
    "param4 = dict(subsample=subsample, colsample_bytree=colsample_bytree)\n",
    "\n",
    "kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=7)\n",
    "\n",
    "#网格搜索类，要求的param_grid参数，必须是字典，或者字典构成的列表\n",
    "grid_search = GridSearchCV(model4, param_grid=param4, scoring=\"neg_log_loss\", n_jobs=-1, cv=kfold, verbose=1)\n",
    "grid_result = grid_search.fit(X, Y)\n",
    "\n",
    "print(\"Best: %f using %s\" % (grid_result.best_score_, grid_result.best_params_))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "再次细化上述两个参数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fitting 5 folds for each of 40 candidates, totalling 200 fits\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:    7.5s\n",
      "[Parallel(n_jobs=-1)]: Done 192 tasks      | elapsed:   18.5s\n",
      "[Parallel(n_jobs=-1)]: Done 200 out of 200 | elapsed:   19.0s finished\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Best: -0.473702 using {'colsample_bytree': 0.65, 'subsample': 0.8}\n"
     ]
    }
   ],
   "source": [
    "colsample_bytree  =  [ i/100.0 for i in range(65,90,5)]\n",
    "subsample = [ i/100.0 for i in range(55,95,5)]\n",
    "param4_2 = dict(subsample=subsample, colsample_bytree=colsample_bytree)\n",
    "\n",
    "grid_search = GridSearchCV(model4, param_grid=param4_2, scoring=\"neg_log_loss\", n_jobs=-1, cv=kfold, verbose=1)\n",
    "grid_result = grid_search.fit(X, Y)\n",
    "\n",
    "print(\"Best: %f using %s\" % (grid_result.best_score_, grid_result.best_params_))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**结论**\n",
    "\n",
    "- 'colsample_bytree': 0.65, \n",
    "- 'subsample': 0.8"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 5.正则化参数调优"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fitting 5 folds for each of 25 candidates, totalling 125 fits\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:    7.7s\n",
      "[Parallel(n_jobs=-1)]: Done 125 out of 125 | elapsed:   12.9s finished\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Best: -0.473605 using {'reg_alpha': 0.01, 'reg_lambda': 1}\n"
     ]
    }
   ],
   "source": [
    "model5 = XGBClassifier(\n",
    "    learning_rate=0.01,\n",
    "    n_estimators=300,\n",
    "    max_depth=4,\n",
    "    min_child_weight=4,\n",
    "    gamma=0.7,\n",
    "    subsample=0.8,\n",
    "    colsample_bytree=0.65,\n",
    "    objective= 'binary:logistic',\n",
    "    nthread=4,\n",
    "    scale_pos_weight=1,\n",
    "    seed=27)\n",
    "\n",
    "reg_alpha = [1e-5, 1e-2, 0.1, 1, 100]\n",
    "reg_lambda  =  [1e-5, 1e-2, 0.1, 1, 100]\n",
    "param5 = dict(reg_alpha=reg_alpha, reg_lambda=reg_lambda)\n",
    "\n",
    "kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=7)\n",
    "\n",
    "#网格搜索类，要求的param_grid参数，必须是字典，或者字典构成的列表\n",
    "grid_search = GridSearchCV(model5, param_grid=param5, scoring=\"neg_log_loss\", n_jobs=-1, cv=kfold, verbose=1)\n",
    "grid_result = grid_search.fit(X, Y)\n",
    "\n",
    "print(\"Best: %f using %s\" % (grid_result.best_score_, grid_result.best_params_))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "再次细化上述参数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fitting 5 folds for each of 9 candidates, totalling 45 fits\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=-1)]: Done  45 out of  45 | elapsed:    8.0s finished\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Best: -0.473605 using {'reg_alpha': 0.01, 'reg_lambda': 1}\n"
     ]
    }
   ],
   "source": [
    "reg_alpha = [1e-3, 1e-2, 0.1]\n",
    "reg_lambda  =  [0.1, 1, 10]\n",
    "param5_2 = dict(reg_alpha=reg_alpha, reg_lambda=reg_lambda)\n",
    "\n",
    "grid_search = GridSearchCV(model5, param_grid=param5_2, scoring=\"neg_log_loss\", n_jobs=-1, cv=kfold, verbose=1)\n",
    "grid_result = grid_search.fit(X, Y)\n",
    "\n",
    "print(\"Best: %f using %s\" % (grid_result.best_score_, grid_result.best_params_))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**结论：**\n",
    "\n",
    "- 'reg_alpha': 0.01, \n",
    "- 'reg_lambda': 1"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 6.再次降低学习速率"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fitting 5 folds for each of 4 candidates, totalling 20 fits\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:    5.8s finished\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Best: -0.473605 using {'learning_rate': 0.01}\n"
     ]
    }
   ],
   "source": [
    "model6 = XGBClassifier(\n",
    "    n_estimators=300,\n",
    "    max_depth=4,\n",
    "    min_child_weight=4,\n",
    "    gamma=0.7,\n",
    "    subsample=0.8,\n",
    "    colsample_bytree=0.65,\n",
    "    reg_alpha=0.01,\n",
    "    reg_lambda=1,\n",
    "    objective= 'binary:logistic',\n",
    "    nthread=4,\n",
    "    scale_pos_weight=1,\n",
    "    seed=27)\n",
    "\n",
    "learning_rate = [0.001, 0.01, 0.1, 1]\n",
    "\n",
    "param6 = dict(learning_rate=learning_rate)\n",
    "\n",
    "kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=7)\n",
    "\n",
    "#网格搜索类，要求的param_grid参数，必须是字典，或者字典构成的列表\n",
    "grid_search = GridSearchCV(model6, param_grid=param6, scoring=\"neg_log_loss\", n_jobs=-1, cv=kfold, verbose=1)\n",
    "grid_result = grid_search.fit(X, Y)\n",
    "\n",
    "print(\"Best: %f using %s\" % (grid_result.best_score_, grid_result.best_params_))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**结论**\n",
    "学习率=0.01确实是最好的"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 综上，完成所有调参"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n",
      "C:\\Users\\TG\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n",
      "  if diff:\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "       neg_log_loss  accuracy        f1   roc_auc\n",
      "Train     -0.477979  0.756760  0.614948  0.827453\n",
      "Test      -0.519663  0.739538  0.605151  0.804260\n"
     ]
    }
   ],
   "source": [
    "cv_score_train_test(grid_search.best_estimator_)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "xbg_clf1  model6 模型效果对比\n",
    "![](https://raw.githubusercontent.com/tangg9646/my_github_image_bed/master/img20191209171001.png)\n",
    "![](https://raw.githubusercontent.com/tangg9646/my_github_image_bed/master/img20191209171026.png)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
